clear
set more off
macro drop all
capture log close

/********************************************************************************
Discrimination in Multi-Phase Systems: Evidence from Child Protection

Created on: 12/4/17

Last Modified on: 2/20/2024

Description: Create files with information for whether (and when) a child had an
	     investigation before 2008 when the worker data begins  and whether 
	     a child was placed in foster care before 2008.

Note that we have removed the file directory names from this program for 
confidentiality reasons.
********************************************************************************/

** Setting the Directory
global rawdata 
global cleandata 
global tmp 

/********************************************************************************

The newest version of the data contains only case information from 
2008-2017 but I can merge the older version which contains information going
all the way back to Aug 1996.  This file does 2 things:

(1) Create a file consisting of a child's most recent complaint before 2008 and the
    number of investigations they had prior to 2008. 
	-this will be used to determine which post 2008 cases could have entered
	the caseworker rotation system

(2) Create a file containing a list of children who were placed into foster care
before 2008.
	-this will be used to exclude post 2008 investigations in my analysis in 
	which the child had previously been placed in foster care

*******************************************************************************/

*********************
**(1) CREATE FILE WITH MOST RECENT PRE 2008 INVESTIGATION DATE
*********************

**Start with the old version of the case file, which contains the complaint dates
use "${rawdata}old_case_raw.dta", clear
rename intakechild vicid 
rename invest inv_caseid
keep vicid complaint_date complaint_year inv_caseid
drop if complaint_date==""
gen complaint_date1=date(complaint_date, "YMD")
drop complaint_date
rename complaint_date1 complaint_date
gen cmonth=month(complaint_date)

tab complaint_year
/*
complaint_y |
        ear |      Freq.     Percent        Cum.
------------+-----------------------------------
       1917 |          1        0.00        0.00
       1919 |         21        0.00        0.00
       1924 |          1        0.00        0.00
       1946 |          2        0.00        0.00
       1949 |          2        0.00        0.00
       1959 |          4        0.00        0.00
       1961 |          3        0.00        0.00
       1962 |          3        0.00        0.00
       1965 |          1        0.00        0.00
       1966 |          2        0.00        0.00
       1973 |          3        0.00        0.00
       1977 |          1        0.00        0.00
       1979 |          1        0.00        0.00
       1980 |          2        0.00        0.00
       1985 |          1        0.00        0.00
       1986 |          1        0.00        0.00
       1988 |          4        0.00        0.00
       1989 |         18        0.00        0.00
       1990 |         24        0.00        0.00
       1991 |         66        0.00        0.01
       1992 |        113        0.00        0.01
       1993 |        194        0.01        0.02
       1994 |      1,567        0.07        0.08
       1995 |      3,702        0.15        0.24
       1996 |     47,774        1.99        2.23
       1997 |    103,699        4.32        6.55
       1998 |    110,875        4.62       11.17
       1999 |    109,017        4.54       15.71
       2000 |    111,522        4.64       20.35
       2001 |    114,674        4.78       25.13
       2002 |    123,066        5.13       30.25
       2003 |    124,111        5.17       35.42
       2004 |    119,074        4.96       40.38
       2005 |    109,820        4.57       44.95
       2006 |    120,203        5.01       49.96
       2007 |    124,646        5.19       55.15
       2008 |    122,792        5.11       60.27
       2009 |    124,150        5.17       65.44
       2010 |    133,974        5.58       71.02
       2011 |    145,043        6.04       77.06
       2012 |    151,855        6.32       83.38
       2013 |    148,654        6.19       89.57
       2014 |    136,380        5.68       95.25
       2015 |    113,978        4.75      100.00
------------+-----------------------------------
      Total |  2,401,044      100.00


*/

tab cmonth if complaint_year==1996

/*

     cmonth |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |        635        1.33        1.33
          2 |        704        1.47        2.80
          3 |        944        1.98        4.78
          4 |      1,193        2.50        7.28
          5 |      1,565        3.28       10.55
          6 |      1,837        3.85       14.40
          7 |      3,397        7.11       21.51
          8 |      5,438       11.38       32.89
          9 |      7,958       16.66       49.55
         10 |      9,528       19.94       69.49
         11 |      7,701       16.12       85.61
         12 |      6,874       14.39      100.00
------------+-----------------------------------
      Total |     47,774      100.00
*/

drop if complaint_year<1996
drop if cmonth<8 & complaint_year==1996

**Keep only information on investigations BEFORE 2008, since I have more reliable
**data starting in 2008
drop if complaint_year>=2008

**Do some basic cleaning of the data, including getting rid of investigations
**which happened very close to each other, which were essentially part of the 
**same case
duplicates drop vicid complaint_date, force
sort vicid complaint_date
gen days_since_previous=complaint_date-complaint_date[_n-1] if vicid==vicid[_n-1]
drop if days_since<30
bysort vicid: gen n_inv_pre2008=_N
la var n_inv_pre2008 "# of Investigations Before 2008"

**Keep only the MOST RECENT investigation for each child
bysort vicid: egen cdate_max=max(complaint_date)
keep vicid cdate_max n_inv_pre2008
rename cdate_max complaint_date_pre2008
format complaint_date_pre2008 %td
la var complaint_date_pre2008 "Date of Most Recent Complaint Before 2008"
egen tag=tag(vicid)
keep if tag==1
drop tag

**Save file containing the first complaint for every child dating back to Aug of 
**1996.
compress
sort vicid
save "${cleandata}complaint_date_pre2008.dta", replace

*********************
**(2) CREATE LIST OF CHILDREN WHO WERE PLACED IN FOSTER CARE BEFORE 2008
*********************
use "${rawdata}old_fcplacements.dta", clear
rename intakechildvicpartyid vicid 
keep vicid removal_date
gen removal_date1=date(removal_date,"YMD")
drop removal_date
rename removal_date1 removal_date
format removal_date %td
gen removal_year=year(removal_date)
keep if removal_year<2008
keep vicid
duplicates drop
save "${cleandata}removal_date_pre2008.dta", replace


















 
